In [1]:
##### import packages
#base
import os
import sys
from collections import defaultdict
import numpy as np
import scipy.stats
from matplotlib import pyplot as plt
import random
# import pyreadr
import pandas as pd
import seaborn as sns
%matplotlib inline
%autosave 30

#misc
import pickle
# import torch.nn.functional as F
import itertools
import time
import umap
import plotly
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# import homebrew modules
import models.tomtom_models as tm
import models.tomtom_util as tu
Autosaving every 30 seconds
In [2]:
global tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d
global ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d
global tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d

# import pickled data
with open('data/tomtom_data_preprocessed.pkl','rb') as f:
    [tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d,
    ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d,
    tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d] = pickle.load(f) 
In [3]:
# import fitted models
with open('tomtom_fitted_models.pkl','rb') as f:
    [seeds_self_norm_all_grp,maps_self_norm_all_grp,logprobs_self_norm_all_grp,mems_self_norm_all_grp,
     seeds_self_norm_all_dim,maps_self_norm_all_dim,logprobs_self_norm_all_dim,
     seeds_self_norm_noauto_grp,maps_self_norm_noauto_grp,logprobs_self_norm_noauto_grp,mems_self_norm_noauto_grp,
     seeds_self_norm_noauto_dim,maps_self_norm_noauto_dim,logprobs_self_norm_noauto_dim,
     seeds_self_raw_all_grp,maps_self_raw_all_grp,logprobs_self_raw_all_grp,mems_self_raw_all_grp,
     seeds_self_raw_all_dim,maps_self_raw_all_dim,logprobs_self_raw_all_dim,
     seeds_self_raw_noauto_grp,maps_self_raw_noauto_grp,logprobs_self_raw_noauto_grp,mems_self_raw_noauto_grp,
     seeds_self_raw_noauto_dim,maps_self_raw_noauto_dim,logprobs_self_raw_noauto_dim] = pickle.load(f)
In [4]:
def pca_scree(data):
    scaler = StandardScaler()
    data = data.reshape(data.shape[0],-1)
    dt_scaled = scaler.fit_transform(data)
    decomp = PCA()
    dtdecomp = decomp.fit(dt_scaled)
    plt.scatter(list(range(np.shape(dtdecomp.explained_variance_ratio_)[0])),np.array(dtdecomp.explained_variance_ratio_))
    
def viz_umap_2d(data,mem,n_nbr = 50,min_dist = .1,random_state = 9):
    random.seed(random_state)
    mapper = umap.UMAP(n_neighbors = n_nbr, min_dist = min_dist)
    data = data.reshape(data.shape[0],-1)
    udt = mapper.fit_transform(data)
    sns.scatterplot(udt[:,0],udt[:,1],hue = mem)

def viz_umap_3d(data,mem,n_nbr = 50,min_dist = .1,random_state = 9):
    random.seed(random_state)
    mapper = umap.UMAP(n_neighbors = n_nbr, min_dist = min_dist, n_components = 3)
    data = data.reshape(data.shape[0],-1)
    udt = mapper.fit_transform(data)
    # Configure Plotly to be rendered inline in the notebook.
    plotly.offline.init_notebook_mode()
    trace = go.Scatter3d(
        x=udt[:,0], 
        y=udt[:,1],  
        z=udt[:,2], 
        mode='markers',
        marker={
            'size': 10,
            'opacity': 0.8,
            'color': mem,
        }
    )
    # Configure the layout.
    layout = go.Layout(
        margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
    )
    plot_figure = go.Figure(data=[trace], layout=layout)
    # Render the plot.
    plotly.offline.iplot(plot_figure)

PCA-scree to identify optimal K

In [5]:
pca_scree(tself_raw_noauto_3d)

Visualize with membership tags from K = 2 model

In [6]:
viz_umap_2d(tself_raw_noauto_3d,mems_self_raw_noauto_grp[1])
/home/zidong/.conda/envs/tomtom-env/lib/python3.6/site-packages/numba/np/ufunc/parallel.py:363: NumbaWarning:

The TBB threading layer requires TBB version 2019.5 or later i.e., TBB_INTERFACE_VERSION >= 11005. Found TBB_INTERFACE_VERSION = 6103. The TBB threading layer is disabled.

/home/zidong/.conda/envs/tomtom-env/lib/python3.6/site-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

In [7]:
viz_umap_3d(tself_raw_noauto_3d,mems_self_raw_noauto_grp[1])

Visualize with membership tags from K = 6 model

In [8]:
viz_umap_2d(tself_raw_noauto_3d,mems_self_raw_noauto_grp[5])
/home/zidong/.conda/envs/tomtom-env/lib/python3.6/site-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

In [9]:
viz_umap_3d(tself_raw_noauto_3d,mems_self_raw_noauto_grp[5])